In [ ]:
#!pip install deep_tabular_augmentation
import os
import numpy as np
import torch
from torch import nn
from torch import optim
from sklearn.preprocessing import StandardScaler
from functools import partial
from vpower.src.utils.auxiliary_functions import *
import matplotlib.pyplot as plt
In [ ]:
#!pip install sdv
In [ ]:
import sdv
from sdv.metadata import SingleTableMetadata
from sdv.single_table import GaussianCopulaSynthesizer
from sdv.lite import SingleTablePreset
from sdv.single_table import CTGANSynthesizer
from sdv.single_table import CopulaGANSynthesizer

LOAD DATA¶

In [ ]:
data_dir_s =  "../../../data/synthetic_data"
data_dir_r =  "../../../data/real_data"
data_dir_da ="../../../data/augmented_data"
data_tag="RD"
In [ ]:
# Load data partitions and assign them to dictionary data_all
data_all = {}
for partition in ["train", "dev_in", "dev_out"]:
    df = load_data_and_set_index(filepath=os.path.join(data_dir_s, f"{partition}.csv"), index_column_name="time_id")
    data_all[partition+"_s"] = df
for partition in ["train", "dev_in", "dev_out"]:
    df = load_data_and_set_index(filepath=os.path.join(data_dir_r, f"{partition}.csv"), index_column_name="time_id")
    data_all[partition+"_r"] = df

if data_tag=="RD":
    data_all["train"]=data_all["train_r"]
    data_all["dev_in"]=data_all["dev_in_r"]
else:
    data_all["train"]=   pd.concat([data_all["train_r"], data_all["train_s"]])
    data_all["dev_in"] = pd.concat([data_all["dev_in_r"], data_all["dev_in_s"]])
    
    
In [ ]:
real_data=data_all["train"]
len(real_data)
Out[ ]:
530706
In [ ]:
## Metadate Detection
In [ ]:
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=real_data)
print('Auto detected data:\n')
metadata
Auto detected data:

Out[ ]:
{
    "columns": {
        "draft_aft_telegram": {
            "sdtype": "numerical"
        },
        "draft_fore_telegram": {
            "sdtype": "numerical"
        },
        "stw": {
            "sdtype": "numerical"
        },
        "diff_speed_overground": {
            "sdtype": "numerical"
        },
        "awind_vcomp_provider": {
            "sdtype": "numerical"
        },
        "awind_ucomp_provider": {
            "sdtype": "numerical"
        },
        "rcurrent_vcomp": {
            "sdtype": "numerical"
        },
        "rcurrent_ucomp": {
            "sdtype": "numerical"
        },
        "comb_wind_swell_wave_height": {
            "sdtype": "numerical"
        },
        "timeSinceDryDock": {
            "sdtype": "numerical"
        },
        "power": {
            "sdtype": "numerical"
        }
    },
    "METADATA_SPEC_VERSION": "SINGLE_TABLE_V1"
}
In [ ]:
#GaussianCopulaSynthesizer
# Step 1: Create the synthesizer
synthesizer = GaussianCopulaSynthesizer(metadata)
# Step 2: Train the synthesizer
synthesizer.fit(real_data)
synthesizer.save(
    filepath='my_GaussianCopulaSynthesizer.pkl')
/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning: No rounding scheme detected for column 'stw'. Data will not be rounded.
  warnings.warn(
/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning: No rounding scheme detected for column 'diff_speed_overground'. Data will not be rounded.
  warnings.warn(
/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning: No rounding scheme detected for column 'awind_vcomp_provider'. Data will not be rounded.
  warnings.warn(
/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning: No rounding scheme detected for column 'awind_ucomp_provider'. Data will not be rounded.
  warnings.warn(
/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning: No rounding scheme detected for column 'rcurrent_vcomp'. Data will not be rounded.
  warnings.warn(
/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning: No rounding scheme detected for column 'rcurrent_ucomp'. Data will not be rounded.
  warnings.warn(
/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning: No rounding scheme detected for column 'comb_wind_swell_wave_height'. Data will not be rounded.
  warnings.warn(
In [ ]:
#GaussianCopulaSynthesizer
# Step 1: Create the synthesizer
synthesizer = CopulaGANSynthesizer(metadata)
# Step 2: Train the synthesizer
synthesizer.fit(real_data)
synthesizer.save(
    filepath='my_CopulaGANSynthesizer.pkl')
/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning:

No rounding scheme detected for column 'stw'. Data will not be rounded.

/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning:

No rounding scheme detected for column 'diff_speed_overground'. Data will not be rounded.

/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning:

No rounding scheme detected for column 'awind_vcomp_provider'. Data will not be rounded.

/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning:

No rounding scheme detected for column 'awind_ucomp_provider'. Data will not be rounded.

/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning:

No rounding scheme detected for column 'rcurrent_vcomp'. Data will not be rounded.

/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning:

No rounding scheme detected for column 'rcurrent_ucomp'. Data will not be rounded.

/home/user/.local/lib/python3.8/site-packages/rdt/transformers/numerical.py:100: UserWarning:

No rounding scheme detected for column 'comb_wind_swell_wave_height'. Data will not be rounded.

In [ ]:
# synthesizer = CopulaGANSynthesizer.load(filepath='my_CopulaGANSynthesizer.pkl')
# Step 3: Generate synthetic data
synthetic_data = synthesizer.sample(num_rows=25000)
print(len(synthetic_data))
25000
In [ ]:
feature_under_study = "awind_vcomp_provider"

plt.figure(figsize=(6, 6))
plt.scatter(synthetic_data["stw"],synthetic_data['power'] / 1e3,
            c=synthetic_data[feature_under_study],
            s=4, label="FAKE")
plt.legend(loc="upper left")
plt.xlabel("Speed (knots)")
plt.ylabel("Power (MW)")
#plt.xlim(4, 25)
#plt.ylim(-2, 45)
plt.grid()

cbar = plt.colorbar()
cbar.set_label(feature_under_study, rotation=90)
plt.clim(-15, 50)

plt.show()
plt.close()
No description has been provided for this image

Evaluating real vs. synthetic data¶

In [ ]:
from sdv.evaluation.single_table import evaluate_quality

quality_report = evaluate_quality(
    real_data,
    synthetic_data,
    metadata
)
Creating report: 100%|██████████| 4/4 [00:07<00:00,  1.83s/it]
Overall Quality Score: 95.3%

Properties:
Column Shapes: 93.78%
Column Pair Trends: 96.83%

In [ ]:
quality_report.get_visualization('Column Shapes')
In [ ]:
quality_report.get_visualization('Column Pair Trends')
In [ ]:
from sdv.evaluation.single_table import get_column_plot

fig = get_column_plot(
    real_data=real_data,
    synthetic_data=synthetic_data,
    column_name='power',
    metadata=metadata
)
    
fig.show()
In [ ]:
from sdv.evaluation.single_table import get_column_plot

fig = get_column_plot(
    real_data=real_data,
    synthetic_data=synthetic_data,
    column_name='stw',
    metadata=metadata
)
    
fig.show()
In [ ]:
from sdv.evaluation.single_table import get_column_pair_plot

fig = get_column_pair_plot(
    real_data=real_data,
    synthetic_data=synthetic_data,
    column_names=['stw', 'power'],
    metadata=metadata
)
    
fig.show()
In [ ]:
from sdv.evaluation.single_table import get_column_pair_plot

fig = get_column_pair_plot(
    real_data=real_data,
    synthetic_data=synthetic_data,
    column_names=['awind_vcomp_provider', 'power'],
    metadata=metadata
)
    
fig.show()
In [ ]:
len(real_data)
Out[ ]:
530706
In [ ]:
df_all=pd.concat([real_data,synthetic_data])
df_all.to_csv(data_dir_r+'/da2_train.csv')
In [ ]: